{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Building Advanced RAG With LlamaParse\n",
    "\n",
    "In this notebook we will demonstrate the following:\n",
    "\n",
    "1. Using LlamaParse.\n",
    "2. Using Recursive Retrieval with LlamaParse to query tables/ text within a document hierarchically.\n",
    "\n",
    "[LlamaParse Documentation](https://github.com/run-llama/llama_parse/)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Installation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting llama-index\n",
      "  Downloading llama_index-0.11.5-py3-none-any.whl.metadata (11 kB)\n",
      "Collecting llama-index-agent-openai<0.4.0,>=0.3.0 (from llama-index)\n",
      "  Downloading llama_index_agent_openai-0.3.0-py3-none-any.whl.metadata (728 bytes)\n",
      "Collecting llama-index-cli<0.4.0,>=0.3.0 (from llama-index)\n",
      "  Downloading llama_index_cli-0.3.0-py3-none-any.whl.metadata (1.5 kB)\n",
      "Collecting llama-index-core<0.12.0,>=0.11.5 (from llama-index)\n",
      "  Downloading llama_index_core-0.11.5-py3-none-any.whl.metadata (2.4 kB)\n",
      "Collecting llama-index-embeddings-openai<0.3.0,>=0.2.4 (from llama-index)\n",
      "  Downloading llama_index_embeddings_openai-0.2.4-py3-none-any.whl.metadata (635 bytes)\n",
      "Collecting llama-index-indices-managed-llama-cloud>=0.3.0 (from llama-index)\n",
      "  Downloading llama_index_indices_managed_llama_cloud-0.3.0-py3-none-any.whl.metadata (3.8 kB)\n",
      "Collecting llama-index-legacy<0.10.0,>=0.9.48 (from llama-index)\n",
      "  Downloading llama_index_legacy-0.9.48.post3-py3-none-any.whl.metadata (8.5 kB)\n",
      "Collecting llama-index-llms-openai<0.3.0,>=0.2.2 (from llama-index)\n",
      "  Downloading llama_index_llms_openai-0.2.2-py3-none-any.whl.metadata (705 bytes)\n",
      "Collecting llama-index-multi-modal-llms-openai<0.3.0,>=0.2.0 (from llama-index)\n",
      "  Downloading llama_index_multi_modal_llms_openai-0.2.0-py3-none-any.whl.metadata (728 bytes)\n",
      "Collecting llama-index-program-openai<0.3.0,>=0.2.0 (from llama-index)\n",
      "  Downloading llama_index_program_openai-0.2.0-py3-none-any.whl.metadata (766 bytes)\n",
      "Collecting llama-index-question-gen-openai<0.3.0,>=0.2.0 (from llama-index)\n",
      "  Downloading llama_index_question_gen_openai-0.2.0-py3-none-any.whl.metadata (785 bytes)\n",
      "Collecting llama-index-readers-file<0.3.0,>=0.2.0 (from llama-index)\n",
      "  Downloading llama_index_readers_file-0.2.0-py3-none-any.whl.metadata (5.4 kB)\n",
      "Collecting llama-index-readers-llama-parse>=0.3.0 (from llama-index)\n",
      "  Downloading llama_index_readers_llama_parse-0.3.0-py3-none-any.whl.metadata (3.5 kB)\n",
      "Collecting nltk>3.8.1 (from llama-index)\n",
      "  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)\n",
      "Collecting openai>=1.14.0 (from llama-index-agent-openai<0.4.0,>=0.3.0->llama-index)\n",
      "  Downloading openai-1.43.0-py3-none-any.whl.metadata (22 kB)\n",
      "Requirement already satisfied: PyYAML>=6.0.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.5->llama-index) (6.0.1)\n",
      "Collecting SQLAlchemy>=1.4.49 (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading SQLAlchemy-2.0.34-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n",
      "Collecting aiohttp<4.0.0,>=3.8.6 (from llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading aiohttp-3.10.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.5 kB)\n",
      "Collecting dataclasses-json (from llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)\n",
      "Collecting deprecated>=1.2.9.3 (from llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading Deprecated-1.2.14-py2.py3-none-any.whl.metadata (5.4 kB)\n",
      "Collecting dirtyjson<2.0.0,>=1.0.8 (from llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading dirtyjson-1.0.8-py3-none-any.whl.metadata (11 kB)\n",
      "Collecting fsspec>=2023.5.0 (from llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n",
      "Collecting httpx (from llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)\n",
      "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.5->llama-index) (1.5.8)\n",
      "Requirement already satisfied: networkx>=3.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.5->llama-index) (3.0)\n",
      "Requirement already satisfied: numpy<2.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.5->llama-index) (1.24.1)\n",
      "Requirement already satisfied: pillow>=9.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.5->llama-index) (9.3.0)\n",
      "Collecting pydantic<3.0.0,>=2.7.0 (from llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading pydantic-2.8.2-py3-none-any.whl.metadata (125 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m125.2/125.2 kB\u001b[0m \u001b[31m29.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: requests>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.5->llama-index) (2.31.0)\n",
      "Collecting tenacity!=8.4.0,<9.0.0,>=8.2.0 (from llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)\n",
      "Collecting tiktoken>=0.3.3 (from llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
      "Collecting tqdm<5.0.0,>=4.66.1 (from llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading tqdm-4.66.5-py3-none-any.whl.metadata (57 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.6/57.6 kB\u001b[0m \u001b[31m24.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting typing-extensions>=4.5.0 (from llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)\n",
      "Collecting typing-inspect>=0.8.0 (from llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n",
      "Collecting wrapt (from llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
      "Collecting llama-cloud>=0.0.11 (from llama-index-indices-managed-llama-cloud>=0.3.0->llama-index)\n",
      "  Downloading llama_cloud-0.0.15-py3-none-any.whl.metadata (751 bytes)\n",
      "Collecting pandas (from llama-index-legacy<0.10.0,>=0.9.48->llama-index)\n",
      "  Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)\n",
      "Collecting beautifulsoup4<5.0.0,>=4.12.3 (from llama-index-readers-file<0.3.0,>=0.2.0->llama-index)\n",
      "  Downloading beautifulsoup4-4.12.3-py3-none-any.whl.metadata (3.8 kB)\n",
      "Collecting pypdf<5.0.0,>=4.0.1 (from llama-index-readers-file<0.3.0,>=0.2.0->llama-index)\n",
      "  Downloading pypdf-4.3.1-py3-none-any.whl.metadata (7.4 kB)\n",
      "Collecting striprtf<0.0.27,>=0.0.26 (from llama-index-readers-file<0.3.0,>=0.2.0->llama-index)\n",
      "  Downloading striprtf-0.0.26-py3-none-any.whl.metadata (2.1 kB)\n",
      "Collecting llama-parse>=0.5.0 (from llama-index-readers-llama-parse>=0.3.0->llama-index)\n",
      "  Downloading llama_parse-0.5.2-py3-none-any.whl.metadata (4.5 kB)\n",
      "Collecting click (from nltk>3.8.1->llama-index)\n",
      "  Downloading click-8.1.7-py3-none-any.whl.metadata (3.0 kB)\n",
      "Collecting joblib (from nltk>3.8.1->llama-index)\n",
      "  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)\n",
      "Collecting regex>=2021.8.3 (from nltk>3.8.1->llama-index)\n",
      "  Downloading regex-2024.7.24-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.5/40.5 kB\u001b[0m \u001b[31m12.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting aiohappyeyeballs>=2.3.0 (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading aiohappyeyeballs-2.4.0-py3-none-any.whl.metadata (5.9 kB)\n",
      "Collecting aiosignal>=1.1.2 (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading aiosignal-1.3.1-py3-none-any.whl.metadata (4.0 kB)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.5->llama-index) (23.1.0)\n",
      "Collecting frozenlist>=1.1.1 (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
      "Collecting multidict<7.0,>=4.5 (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)\n",
      "Collecting yarl<2.0,>=1.0 (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading yarl-1.9.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (43 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.0/43.0 kB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting async-timeout<5.0,>=4.0 (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)\n",
      "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4<5.0.0,>=4.12.3->llama-index-readers-file<0.3.0,>=0.2.0->llama-index) (2.5)\n",
      "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.5->llama-index) (4.0.0)\n",
      "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.5->llama-index) (2022.12.7)\n",
      "Collecting httpcore==1.* (from httpx->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)\n",
      "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.5->llama-index) (3.4)\n",
      "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.5->llama-index) (1.3.0)\n",
      "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n",
      "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai>=1.14.0->llama-index-agent-openai<0.4.0,>=0.3.0->llama-index) (1.7.0)\n",
      "Collecting jiter<1,>=0.4.0 (from openai>=1.14.0->llama-index-agent-openai<0.4.0,>=0.3.0->llama-index)\n",
      "  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n",
      "Collecting annotated-types>=0.4.0 (from pydantic<3.0.0,>=2.7.0->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n",
      "Collecting pydantic-core==2.20.1 (from pydantic<3.0.0,>=2.7.0->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core<0.12.0,>=0.11.5->llama-index) (2.1.1)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core<0.12.0,>=0.11.5->llama-index) (1.26.13)\n",
      "Collecting greenlet!=0.4.17 (from SQLAlchemy>=1.4.49->SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n",
      "Collecting mypy-extensions>=0.3.0 (from typing-inspect>=0.8.0->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)\n",
      "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->llama-index-core<0.12.0,>=0.11.5->llama-index)\n",
      "  Downloading marshmallow-3.22.0-py3-none-any.whl.metadata (7.2 kB)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-index-legacy<0.10.0,>=0.9.48->llama-index) (2.8.2)\n",
      "Collecting pytz>=2020.1 (from pandas->llama-index-legacy<0.10.0,>=0.9.48->llama-index)\n",
      "  Downloading pytz-2024.1-py2.py3-none-any.whl.metadata (22 kB)\n",
      "Collecting tzdata>=2022.7 (from pandas->llama-index-legacy<0.10.0,>=0.9.48->llama-index)\n",
      "  Downloading tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
      "Requirement already satisfied: exceptiongroup>=1.0.2 in /usr/local/lib/python3.10/dist-packages (from anyio->httpx->llama-index-core<0.12.0,>=0.11.5->llama-index) (1.1.3)\n",
      "Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.10/dist-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama-index-core<0.12.0,>=0.11.5->llama-index) (23.2)\n",
      "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas->llama-index-legacy<0.10.0,>=0.9.48->llama-index) (1.16.0)\n",
      "Downloading llama_index-0.11.5-py3-none-any.whl (6.8 kB)\n",
      "Downloading llama_index_agent_openai-0.3.0-py3-none-any.whl (13 kB)\n",
      "Downloading llama_index_cli-0.3.0-py3-none-any.whl (27 kB)\n",
      "Downloading llama_index_core-0.11.5-py3-none-any.whl (1.6 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m111.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading llama_index_embeddings_openai-0.2.4-py3-none-any.whl (6.1 kB)\n",
      "Downloading llama_index_indices_managed_llama_cloud-0.3.0-py3-none-any.whl (9.5 kB)\n",
      "Downloading llama_index_legacy-0.9.48.post3-py3-none-any.whl (1.2 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m96.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading llama_index_llms_openai-0.2.2-py3-none-any.whl (12 kB)\n",
      "Downloading llama_index_multi_modal_llms_openai-0.2.0-py3-none-any.whl (5.9 kB)\n",
      "Downloading llama_index_program_openai-0.2.0-py3-none-any.whl (5.3 kB)\n",
      "Downloading llama_index_question_gen_openai-0.2.0-py3-none-any.whl (2.9 kB)\n",
      "Downloading llama_index_readers_file-0.2.0-py3-none-any.whl (38 kB)\n",
      "Downloading llama_index_readers_llama_parse-0.3.0-py3-none-any.whl (2.5 kB)\n",
      "Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m103.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading aiohttp-3.10.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m102.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading beautifulsoup4-4.12.3-py3-none-any.whl (147 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m147.9/147.9 kB\u001b[0m \u001b[31m42.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)\n",
      "Downloading dirtyjson-1.0.8-py3-none-any.whl (25 kB)\n",
      "Downloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m43.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading llama_cloud-0.0.15-py3-none-any.whl (180 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m180.2/180.2 kB\u001b[0m \u001b[31m45.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m36.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m22.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading llama_parse-0.5.2-py3-none-any.whl (9.5 kB)\n",
      "Downloading openai-1.43.0-py3-none-any.whl (365 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m365.7/365.7 kB\u001b[0m \u001b[31m72.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading pydantic-2.8.2-py3-none-any.whl (423 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m423.9/423.9 kB\u001b[0m \u001b[31m102.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m102.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading pypdf-4.3.1-py3-none-any.whl (295 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.8/295.8 kB\u001b[0m \u001b[31m60.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading regex-2024.7.24-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (776 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m776.5/776.5 kB\u001b[0m \u001b[31m104.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading SQLAlchemy-2.0.34-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m104.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading striprtf-0.0.26-py3-none-any.whl (6.9 kB)\n",
      "Downloading tenacity-8.5.0-py3-none-any.whl (28 kB)\n",
      "Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m92.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading tqdm-4.66.5-py3-none-any.whl (78 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.4/78.4 kB\u001b[0m \u001b[31m21.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading typing_extensions-4.12.2-py3-none-any.whl (37 kB)\n",
      "Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n",
      "Downloading wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (80 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.3/80.3 kB\u001b[0m \u001b[31m25.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading click-8.1.7-py3-none-any.whl (97 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.9/97.9 kB\u001b[0m \u001b[31m32.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n",
      "Downloading joblib-1.4.2-py3-none-any.whl (301 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m301.8/301.8 kB\u001b[0m \u001b[31m85.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.0/13.0 MB\u001b[0m \u001b[31m90.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading aiohappyeyeballs-2.4.0-py3-none-any.whl (12 kB)\n",
      "Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n",
      "Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
      "Downloading async_timeout-4.0.3-py3-none-any.whl (5.7 kB)\n",
      "Downloading frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (239 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m239.5/239.5 kB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (616 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m616.0/616.0 kB\u001b[0m \u001b[31m112.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (318 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m318.9/318.9 kB\u001b[0m \u001b[31m103.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading marshmallow-3.22.0-py3-none-any.whl (49 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (124 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.3/124.3 kB\u001b[0m \u001b[31m53.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
      "Downloading pytz-2024.1-py2.py3-none-any.whl (505 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m505.5/505.5 kB\u001b[0m \u001b[31m106.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading tzdata-2024.1-py2.py3-none-any.whl (345 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m345.4/345.4 kB\u001b[0m \u001b[31m92.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading yarl-1.9.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (468 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m468.3/468.3 kB\u001b[0m \u001b[31m72.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m27.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hInstalling collected packages: striprtf, pytz, dirtyjson, wrapt, tzdata, typing-extensions, tqdm, tenacity, regex, mypy-extensions, multidict, marshmallow, joblib, jiter, h11, greenlet, fsspec, frozenlist, click, beautifulsoup4, async-timeout, annotated-types, aiohappyeyeballs, yarl, typing-inspect, tiktoken, SQLAlchemy, pypdf, pydantic-core, pandas, nltk, httpcore, deprecated, aiosignal, pydantic, httpx, dataclasses-json, aiohttp, openai, llama-index-core, llama-cloud, llama-parse, llama-index-readers-file, llama-index-legacy, llama-index-indices-managed-llama-cloud, llama-index-embeddings-openai, llama-index-readers-llama-parse, llama-index-llms-openai, llama-index-agent-openai, llama-index-program-openai, llama-index-question-gen-openai, llama-index-multi-modal-llms-openai, llama-index-cli, llama-index\n",
      "  Attempting uninstall: typing-extensions\n",
      "    Found existing installation: typing_extensions 4.4.0\n",
      "    Uninstalling typing_extensions-4.4.0:\n",
      "      Successfully uninstalled typing_extensions-4.4.0\n",
      "  Attempting uninstall: fsspec\n",
      "    Found existing installation: fsspec 2023.4.0\n",
      "    Uninstalling fsspec-2023.4.0:\n",
      "      Successfully uninstalled fsspec-2023.4.0\n",
      "  Attempting uninstall: beautifulsoup4\n",
      "    Found existing installation: beautifulsoup4 4.12.2\n",
      "    Uninstalling beautifulsoup4-4.12.2:\n",
      "      Successfully uninstalled beautifulsoup4-4.12.2\n",
      "Successfully installed SQLAlchemy-2.0.34 aiohappyeyeballs-2.4.0 aiohttp-3.10.5 aiosignal-1.3.1 annotated-types-0.7.0 async-timeout-4.0.3 beautifulsoup4-4.12.3 click-8.1.7 dataclasses-json-0.6.7 deprecated-1.2.14 dirtyjson-1.0.8 frozenlist-1.4.1 fsspec-2024.9.0 greenlet-3.0.3 h11-0.14.0 httpcore-1.0.5 httpx-0.27.2 jiter-0.5.0 joblib-1.4.2 llama-cloud-0.0.15 llama-index-0.11.5 llama-index-agent-openai-0.3.0 llama-index-cli-0.3.0 llama-index-core-0.11.5 llama-index-embeddings-openai-0.2.4 llama-index-indices-managed-llama-cloud-0.3.0 llama-index-legacy-0.9.48.post3 llama-index-llms-openai-0.2.2 llama-index-multi-modal-llms-openai-0.2.0 llama-index-program-openai-0.2.0 llama-index-question-gen-openai-0.2.0 llama-index-readers-file-0.2.0 llama-index-readers-llama-parse-0.3.0 llama-parse-0.5.2 marshmallow-3.22.0 multidict-6.0.5 mypy-extensions-1.0.0 nltk-3.9.1 openai-1.43.0 pandas-2.2.2 pydantic-2.8.2 pydantic-core-2.20.1 pypdf-4.3.1 pytz-2024.1 regex-2024.7.24 striprtf-0.0.26 tenacity-8.5.0 tiktoken-0.7.0 tqdm-4.66.5 typing-extensions-4.12.2 typing-inspect-0.9.0 tzdata-2024.1 wrapt-1.16.0 yarl-1.9.11\n",
      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
      "\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n",
      "Collecting llama-index-postprocessor-flag-embedding-reranker\n",
      "  Downloading llama_index_postprocessor_flag_embedding_reranker-0.2.0-py3-none-any.whl.metadata (714 bytes)\n",
      "Requirement already satisfied: llama-index-core<0.12.0,>=0.11.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-postprocessor-flag-embedding-reranker) (0.11.5)\n",
      "Requirement already satisfied: PyYAML>=6.0.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (6.0.1)\n",
      "Requirement already satisfied: SQLAlchemy>=1.4.49 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (2.0.34)\n",
      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (3.10.5)\n",
      "Requirement already satisfied: dataclasses-json in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (0.6.7)\n",
      "Requirement already satisfied: deprecated>=1.2.9.3 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.2.14)\n",
      "Requirement already satisfied: dirtyjson<2.0.0,>=1.0.8 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.0.8)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (2024.9.0)\n",
      "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (0.27.2)\n",
      "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.5.8)\n",
      "Requirement already satisfied: networkx>=3.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (3.0)\n",
      "Requirement already satisfied: nltk>3.8.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (3.9.1)\n",
      "Requirement already satisfied: numpy<2.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.24.1)\n",
      "Requirement already satisfied: pillow>=9.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (9.3.0)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (2.8.2)\n",
      "Requirement already satisfied: requests>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (2.31.0)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.2.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (8.5.0)\n",
      "Requirement already satisfied: tiktoken>=0.3.3 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (0.7.0)\n",
      "Requirement already satisfied: tqdm<5.0.0,>=4.66.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (4.66.5)\n",
      "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (4.12.2)\n",
      "Requirement already satisfied: typing-inspect>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (0.9.0)\n",
      "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.16.0)\n",
      "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (2.4.0)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.3.1)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (23.1.0)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.4.1)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (6.0.5)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.9.11)\n",
      "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (4.0.3)\n",
      "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (8.1.7)\n",
      "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.4.2)\n",
      "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (2024.7.24)\n",
      "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.0->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.20.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.0->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (2.20.1)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (2.1.1)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (3.4)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.26.13)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (2022.12.7)\n",
      "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy>=1.4.49->SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (3.0.3)\n",
      "Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from typing-inspect>=0.8.0->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.0.0)\n",
      "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /usr/local/lib/python3.10/dist-packages (from dataclasses-json->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (3.22.0)\n",
      "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (4.0.0)\n",
      "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.0.5)\n",
      "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.3.0)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (0.14.0)\n",
      "Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.10/dist-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (23.2)\n",
      "Requirement already satisfied: exceptiongroup>=1.0.2 in /usr/local/lib/python3.10/dist-packages (from anyio->httpx->llama-index-core<0.12.0,>=0.11.0->llama-index-postprocessor-flag-embedding-reranker) (1.1.3)\n",
      "Downloading llama_index_postprocessor_flag_embedding_reranker-0.2.0-py3-none-any.whl (3.0 kB)\n",
      "Installing collected packages: llama-index-postprocessor-flag-embedding-reranker\n",
      "Successfully installed llama-index-postprocessor-flag-embedding-reranker-0.2.0\n",
      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
      "\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n",
      "Collecting git+https://github.com/FlagOpen/FlagEmbedding.git\n",
      "  Cloning https://github.com/FlagOpen/FlagEmbedding.git to /tmp/pip-req-build-g7g78sb6\n",
      "  Running command git clone --filter=blob:none --quiet https://github.com/FlagOpen/FlagEmbedding.git /tmp/pip-req-build-g7g78sb6\n",
      "  Resolved https://github.com/FlagOpen/FlagEmbedding.git to commit ddad0f9cb9a46be41fdb5d9cde47cfedf2e43241\n",
      "  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25hRequirement already satisfied: torch>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from FlagEmbedding==1.2.11) (2.1.0+cu118)\n",
      "Collecting transformers>=4.33.0 (from FlagEmbedding==1.2.11)\n",
      "  Downloading transformers-4.44.2-py3-none-any.whl.metadata (43 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.7/43.7 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting datasets (from FlagEmbedding==1.2.11)\n",
      "  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)\n",
      "Collecting accelerate>=0.20.1 (from FlagEmbedding==1.2.11)\n",
      "  Downloading accelerate-0.34.0-py3-none-any.whl.metadata (19 kB)\n",
      "Collecting sentence_transformers (from FlagEmbedding==1.2.11)\n",
      "  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)\n",
      "Collecting peft (from FlagEmbedding==1.2.11)\n",
      "  Downloading peft-0.12.0-py3-none-any.whl.metadata (13 kB)\n",
      "Requirement already satisfied: numpy<3.0.0,>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.20.1->FlagEmbedding==1.2.11) (1.24.1)\n",
      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.20.1->FlagEmbedding==1.2.11) (23.2)\n",
      "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.20.1->FlagEmbedding==1.2.11) (5.9.6)\n",
      "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.20.1->FlagEmbedding==1.2.11) (6.0.1)\n",
      "Collecting huggingface-hub>=0.21.0 (from accelerate>=0.20.1->FlagEmbedding==1.2.11)\n",
      "  Downloading huggingface_hub-0.24.6-py3-none-any.whl.metadata (13 kB)\n",
      "Collecting safetensors>=0.4.3 (from accelerate>=0.20.1->FlagEmbedding==1.2.11)\n",
      "  Downloading safetensors-0.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n",
      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->FlagEmbedding==1.2.11) (3.9.0)\n",
      "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->FlagEmbedding==1.2.11) (4.12.2)\n",
      "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->FlagEmbedding==1.2.11) (1.12)\n",
      "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->FlagEmbedding==1.2.11) (3.0)\n",
      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->FlagEmbedding==1.2.11) (3.1.2)\n",
      "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->FlagEmbedding==1.2.11) (2024.9.0)\n",
      "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->FlagEmbedding==1.2.11) (2.1.0)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.33.0->FlagEmbedding==1.2.11) (2024.7.24)\n",
      "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers>=4.33.0->FlagEmbedding==1.2.11) (2.31.0)\n",
      "Collecting tokenizers<0.20,>=0.19 (from transformers>=4.33.0->FlagEmbedding==1.2.11)\n",
      "  Downloading tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n",
      "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.33.0->FlagEmbedding==1.2.11) (4.66.5)\n",
      "Collecting pyarrow>=15.0.0 (from datasets->FlagEmbedding==1.2.11)\n",
      "  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)\n",
      "Collecting dill<0.3.9,>=0.3.0 (from datasets->FlagEmbedding==1.2.11)\n",
      "  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n",
      "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets->FlagEmbedding==1.2.11) (2.2.2)\n",
      "Collecting requests (from transformers>=4.33.0->FlagEmbedding==1.2.11)\n",
      "  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)\n",
      "Collecting xxhash (from datasets->FlagEmbedding==1.2.11)\n",
      "  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
      "Collecting multiprocess (from datasets->FlagEmbedding==1.2.11)\n",
      "  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n",
      "Collecting fsspec (from torch>=1.6.0->FlagEmbedding==1.2.11)\n",
      "  Downloading fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB)\n",
      "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->FlagEmbedding==1.2.11) (3.10.5)\n",
      "Collecting scikit-learn (from sentence_transformers->FlagEmbedding==1.2.11)\n",
      "  Downloading scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
      "Collecting scipy (from sentence_transformers->FlagEmbedding==1.2.11)\n",
      "  Downloading scipy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.8/60.8 kB\u001b[0m \u001b[31m29.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from sentence_transformers->FlagEmbedding==1.2.11) (9.3.0)\n",
      "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->FlagEmbedding==1.2.11) (2.4.0)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->FlagEmbedding==1.2.11) (1.3.1)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->FlagEmbedding==1.2.11) (23.1.0)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->FlagEmbedding==1.2.11) (1.4.1)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->FlagEmbedding==1.2.11) (6.0.5)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->FlagEmbedding==1.2.11) (1.9.11)\n",
      "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->FlagEmbedding==1.2.11) (4.0.3)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.33.0->FlagEmbedding==1.2.11) (2.1.1)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.33.0->FlagEmbedding==1.2.11) (3.4)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.33.0->FlagEmbedding==1.2.11) (1.26.13)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.33.0->FlagEmbedding==1.2.11) (2022.12.7)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.6.0->FlagEmbedding==1.2.11) (2.1.2)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->FlagEmbedding==1.2.11) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->FlagEmbedding==1.2.11) (2024.1)\n",
      "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->FlagEmbedding==1.2.11) (2024.1)\n",
      "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence_transformers->FlagEmbedding==1.2.11) (1.4.2)\n",
      "Collecting threadpoolctl>=3.1.0 (from scikit-learn->sentence_transformers->FlagEmbedding==1.2.11)\n",
      "  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)\n",
      "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.6.0->FlagEmbedding==1.2.11) (1.3.0)\n",
      "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas->datasets->FlagEmbedding==1.2.11) (1.16.0)\n",
      "Downloading accelerate-0.34.0-py3-none-any.whl (324 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m324.3/324.3 kB\u001b[0m \u001b[31m47.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading transformers-4.44.2-py3-none-any.whl (9.5 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.5/9.5 MB\u001b[0m \u001b[31m98.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading datasets-2.21.0-py3-none-any.whl (527 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m527.3/527.3 kB\u001b[0m \u001b[31m93.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading peft-0.12.0-py3-none-any.whl (296 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m296.4/296.4 kB\u001b[0m \u001b[31m57.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.1/227.1 kB\u001b[0m \u001b[31m49.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m31.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading fsspec-2024.6.1-py3-none-any.whl (177 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.6/177.6 kB\u001b[0m \u001b[31m43.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading huggingface_hub-0.24.6-py3-none-any.whl (417 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m417.5/417.5 kB\u001b[0m \u001b[31m84.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m78.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading requests-2.32.3-py3-none-any.whl (64 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.9/64.9 kB\u001b[0m \u001b[31m22.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading safetensors-0.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (435 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m435.5/435.5 kB\u001b[0m \u001b[31m78.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m97.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
      "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m53.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.4 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.4/13.4 MB\u001b[0m \u001b[31m94.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading scipy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (41.2 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.2/41.2 MB\u001b[0m \u001b[31m74.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m57.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading threadpoolctl-3.5.0-py3-none-any.whl (18 kB)\n",
      "Building wheels for collected packages: FlagEmbedding\n",
      "  Building wheel for FlagEmbedding (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25h  Created wheel for FlagEmbedding: filename=FlagEmbedding-1.2.11-py3-none-any.whl size=1532999 sha256=e8fdb11999cd20961dbf8a2cdca1f2a8e9daae60c7ace5a2181e529e49f69801\n",
      "  Stored in directory: /tmp/pip-ephem-wheel-cache-0w5j143m/wheels/41/cf/a5/5dee96ed64e5aaffe5aa3d583828258fdefed9a305db6e7f48\n",
      "Successfully built FlagEmbedding\n",
      "Installing collected packages: xxhash, threadpoolctl, scipy, safetensors, requests, pyarrow, fsspec, dill, scikit-learn, multiprocess, huggingface-hub, tokenizers, accelerate, transformers, datasets, sentence_transformers, peft, FlagEmbedding\n",
      "  Attempting uninstall: requests\n",
      "    Found existing installation: requests 2.31.0\n",
      "    Uninstalling requests-2.31.0:\n",
      "      Successfully uninstalled requests-2.31.0\n",
      "  Attempting uninstall: fsspec\n",
      "    Found existing installation: fsspec 2024.9.0\n",
      "    Uninstalling fsspec-2024.9.0:\n",
      "      Successfully uninstalled fsspec-2024.9.0\n",
      "Successfully installed FlagEmbedding-1.2.11 accelerate-0.34.0 datasets-2.21.0 dill-0.3.8 fsspec-2024.6.1 huggingface-hub-0.24.6 multiprocess-0.70.16 peft-0.12.0 pyarrow-17.0.0 requests-2.32.3 safetensors-0.4.4 scikit-learn-1.5.1 scipy-1.14.1 sentence_transformers-3.0.1 threadpoolctl-3.5.0 tokenizers-0.19.1 transformers-4.44.2 xxhash-3.5.0\n",
      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
      "\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n",
      "Requirement already satisfied: llama-parse in /usr/local/lib/python3.10/dist-packages (0.5.2)\n",
      "Requirement already satisfied: llama-index-core>=0.11.0 in /usr/local/lib/python3.10/dist-packages (from llama-parse) (0.11.5)\n",
      "Requirement already satisfied: PyYAML>=6.0.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (6.0.1)\n",
      "Requirement already satisfied: SQLAlchemy>=1.4.49 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core>=0.11.0->llama-parse) (2.0.34)\n",
      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (3.10.5)\n",
      "Requirement already satisfied: dataclasses-json in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (0.6.7)\n",
      "Requirement already satisfied: deprecated>=1.2.9.3 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (1.2.14)\n",
      "Requirement already satisfied: dirtyjson<2.0.0,>=1.0.8 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (1.0.8)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (2024.6.1)\n",
      "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (0.27.2)\n",
      "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (1.5.8)\n",
      "Requirement already satisfied: networkx>=3.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (3.0)\n",
      "Requirement already satisfied: nltk>3.8.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (3.9.1)\n",
      "Requirement already satisfied: numpy<2.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (1.24.1)\n",
      "Requirement already satisfied: pillow>=9.0.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (9.3.0)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (2.8.2)\n",
      "Requirement already satisfied: requests>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (2.32.3)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.2.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (8.5.0)\n",
      "Requirement already satisfied: tiktoken>=0.3.3 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (0.7.0)\n",
      "Requirement already satisfied: tqdm<5.0.0,>=4.66.1 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (4.66.5)\n",
      "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (4.12.2)\n",
      "Requirement already satisfied: typing-inspect>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (0.9.0)\n",
      "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from llama-index-core>=0.11.0->llama-parse) (1.16.0)\n",
      "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core>=0.11.0->llama-parse) (2.4.0)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core>=0.11.0->llama-parse) (1.3.1)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core>=0.11.0->llama-parse) (23.1.0)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core>=0.11.0->llama-parse) (1.4.1)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core>=0.11.0->llama-parse) (6.0.5)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core>=0.11.0->llama-parse) (1.9.11)\n",
      "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core>=0.11.0->llama-parse) (4.0.3)\n",
      "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core>=0.11.0->llama-parse) (8.1.7)\n",
      "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core>=0.11.0->llama-parse) (1.4.2)\n",
      "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>3.8.1->llama-index-core>=0.11.0->llama-parse) (2024.7.24)\n",
      "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.0->llama-index-core>=0.11.0->llama-parse) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.20.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.0->llama-index-core>=0.11.0->llama-parse) (2.20.1)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core>=0.11.0->llama-parse) (2.1.1)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core>=0.11.0->llama-parse) (3.4)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core>=0.11.0->llama-parse) (1.26.13)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->llama-index-core>=0.11.0->llama-parse) (2022.12.7)\n",
      "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy>=1.4.49->SQLAlchemy[asyncio]>=1.4.49->llama-index-core>=0.11.0->llama-parse) (3.0.3)\n",
      "Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from typing-inspect>=0.8.0->llama-index-core>=0.11.0->llama-parse) (1.0.0)\n",
      "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /usr/local/lib/python3.10/dist-packages (from dataclasses-json->llama-index-core>=0.11.0->llama-parse) (3.22.0)\n",
      "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core>=0.11.0->llama-parse) (4.0.0)\n",
      "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core>=0.11.0->llama-parse) (1.0.5)\n",
      "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->llama-index-core>=0.11.0->llama-parse) (1.3.0)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-index-core>=0.11.0->llama-parse) (0.14.0)\n",
      "Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.10/dist-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama-index-core>=0.11.0->llama-parse) (23.2)\n",
      "Requirement already satisfied: exceptiongroup>=1.0.2 in /usr/local/lib/python3.10/dist-packages (from anyio->httpx->llama-index-core>=0.11.0->llama-parse) (1.1.3)\n",
      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
      "\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "!pip install llama-index\n",
    "!pip install llama-index-postprocessor-flag-embedding-reranker\n",
    "!pip install git+https://github.com/FlagOpen/FlagEmbedding.git\n",
    "!pip install llama-parse"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Download Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2024-09-05 07:01:47--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/10q/uber_10q_march_2022.pdf\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 1260185 (1.2M) [application/octet-stream]\n",
      "Saving to: ‘./uber_10q_march_2022.pdf’\n",
      "\n",
      "./uber_10q_march_20 100%[===================>]   1.20M  --.-KB/s    in 0.02s   \n",
      "\n",
      "2024-09-05 07:01:48 (77.6 MB/s) - ‘./uber_10q_march_2022.pdf’ saved [1260185/1260185]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/10q/uber_10q_march_2022.pdf' -O './uber_10q_march_2022.pdf'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Setting API Keys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# llama-parse is async-first, running the async code in a notebook requires the use of nest_asyncio\n",
    "import nest_asyncio\n",
    "\n",
    "nest_asyncio.apply()\n",
    "\n",
    "import os\n",
    "\n",
    "# API access to llama-cloud\n",
    "os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\"\n",
    "\n",
    "# Using OpenAI API for embeddings/llms\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Setting LLM and Embedding Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.llms.openai import OpenAI\n",
    "from llama_index.embeddings.openai import OpenAIEmbedding\n",
    "from llama_index.core import VectorStoreIndex\n",
    "from llama_index.core import Settings\n",
    "\n",
    "embed_model = OpenAIEmbedding(model=\"text-embedding-3-small\")\n",
    "llm = OpenAI(model=\"gpt-3.5-turbo-0125\")\n",
    "\n",
    "Settings.llm = llm\n",
    "Settings.embed_model = embed_model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### LlamaParse PDF reader for PDF Parsing\n",
    "\n",
    "We compare two different retrieval/ queryengine strategies.\n",
    "\n",
    "1. Using raw Markdown text as nodes for building index and applying a simple query engine for generating results.\n",
    "2. Using MarkdownElementNodeParser for parsing the LlamaParse output Markdown results and building a recursive retriever query engine for generation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Started parsing the file under job_id 0ef2f65b-9cab-4ca8-b221-d20f1f6d1336\n"
     ]
    }
   ],
   "source": [
    "# LlamaParse PDF reader for PDF Parsing\n",
    "from llama_parse import LlamaParse\n",
    "\n",
    "documents = LlamaParse(result_type=\"markdown\").load_data(\n",
    "    \"./uber_10q_march_2022.pdf\"\n",
    ")\n",
    "# Started parsing the file under job_id b76a572b-d2bb-42ae-bad9-b9810049f1af"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# UNITED STATES SECURITIES AND EXCHANGE COMMISSION\n",
      "\n",
      "# Washington, D.C. 20549\n",
      "\n",
      "# FORM 10-Q\n",
      "\n",
      "(Mark One)\n",
      "\n",
      "☒ QUARTERLY REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n",
      "\n",
      "For the quarterly period ended March 31, 2022\n",
      "\n",
      "OR\n",
      "\n",
      "☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n",
      "\n",
      "For the transition period from_____ to _____\n",
      "\n",
      "Commission File Number: 001-38902\n",
      "\n",
      "# UBER TECHNOLOGIES, INC.\n",
      "\n",
      "(Exact name of registrant as specified in its charter)\n",
      "\n",
      "Not Applicable\n",
      "\n",
      "(Former name, former address and former fiscal year, if changed since last report)\n",
      "\n",
      "|Delaware|45-2647441|\n",
      "|---|---|\n",
      "|(State or other jurisdiction of incorporation or organization)|(I.R.S. Employer Identification No.)|\n",
      "|1515 3rd Street|San Francisco, California 94158|\n",
      "|(Address of principal executive offices, including zip code)|(415) 612-8582|\n",
      "|(Registrant’s telephone number, including area code)| |\n",
      "\n",
      "# Securities registered pursuant to Section 12(b) of the Act:\n",
      "\n",
      "|Title of each c...\n"
     ]
    }
   ],
   "source": [
    "print(documents[0].text[:1000] + \"...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3it [00:00, 41803.69it/s]\n",
      "1it [00:00, 22310.13it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "1it [00:00, 20867.18it/s]\n",
      "1it [00:00, 22429.43it/s]\n",
      "1it [00:00, 21399.51it/s]\n",
      "1it [00:00, 20460.02it/s]\n",
      "1it [00:00, 19508.39it/s]\n",
      "1it [00:00, 19508.39it/s]\n",
      "5it [00:00, 85598.04it/s]\n",
      "0it [00:00, ?it/s]\n",
      "2it [00:00, 41527.76it/s]\n",
      "2it [00:00, 46091.25it/s]\n",
      "2it [00:00, 40524.68it/s]\n",
      "2it [00:00, 38836.15it/s]\n",
      "2it [00:00, 42366.71it/s]\n",
      "2it [00:00, 41943.04it/s]\n",
      "1it [00:00, 23967.45it/s]\n",
      "1it [00:00, 24818.37it/s]\n",
      "1it [00:00, 25890.77it/s]\n",
      "4it [00:00, 72628.64it/s]\n",
      "2it [00:00, 38836.15it/s]\n",
      "3it [00:00, 41943.04it/s]\n",
      "0it [00:00, ?it/s]\n",
      "3it [00:00, 58254.22it/s]\n",
      "3it [00:00, 53773.13it/s]\n",
      "1it [00:00, 25575.02it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "1it [00:00, 26051.58it/s]\n",
      "1it [00:00, 21509.25it/s]\n",
      "0it [00:00, ?it/s]\n",
      "1it [00:00, 16008.79it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "2it [00:00, 42153.81it/s]\n",
      "4it [00:00, 76260.07it/s]\n",
      "5it [00:00, 75166.74it/s]\n",
      "2it [00:00, 39383.14it/s]\n",
      "2it [00:00, 39756.44it/s]\n",
      "1it [00:00, 24244.53it/s]\n",
      "2it [00:00, 42153.81it/s]\n",
      "0it [00:00, ?it/s]\n",
      "1it [00:00, 23045.63it/s]\n",
      "1it [00:00, 23431.87it/s]\n",
      "1it [00:00, 24528.09it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "1it [00:00, 10810.06it/s]\n",
      "2it [00:00, 8473.34it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "1it [00:00, 12633.45it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n",
      "0it [00:00, ?it/s]\n"
     ]
    }
   ],
   "source": [
    "from llama_index.core.node_parser import MarkdownElementNodeParser\n",
    "\n",
    "node_parser = MarkdownElementNodeParser(\n",
    "    llm=OpenAI(model=\"gpt-3.5-turbo-0125\"), num_workers=8\n",
    ")\n",
    "\n",
    "nodes = node_parser.get_nodes_from_documents(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text_nodes, index_nodes = node_parser.get_nodes_and_objects(nodes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "TextNode(id_='c6ffea61-1221-40e3-b0e0-5b24cfbd02d5', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='33b7b29c-8eba-458b-a25f-bb8f88951e92', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='3d4ec5b02a042598b0ea47cdac56453869c17b531a10f60343e9598e05a9390e'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='de618b65-c78a-4390-8536-4e9e295c0e49', node_type=<ObjectType.INDEX: '3'>, metadata={'col_schema': 'Column: Delaware\\nType: string\\nSummary: State or other jurisdiction of incorporation or organization\\n\\nColumn: 45-2647441\\nType: string\\nSummary: I.R.S. Employer Identification No.'}, hash='c008153189b8dd031a3e5e694239a50ebd21f42602676f072d9746241fcef858')}, text='UNITED STATES SECURITIES AND EXCHANGE COMMISSION\\n\\n Washington, D.C. 20549\\n\\n FORM 10-Q\\n\\n(Mark One)\\n\\n☒ QUARTERLY REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\\n\\nFor the quarterly period ended March 31, 2022\\n\\nOR\\n\\n☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\\n\\nFor the transition period from_____ to _____\\n\\nCommission File Number: 001-38902\\n\\n UBER TECHNOLOGIES, INC.\\n\\n(Exact name of registrant as specified in its charter)\\n\\nNot Applicable\\n\\n(Former name, former address and former fiscal year, if changed since last report)', mimetype='text/plain', start_char_idx=1, end_char_idx=595, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n')"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text_nodes[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "IndexNode(id_='de618b65-c78a-4390-8536-4e9e295c0e49', embedding=None, metadata={'col_schema': 'Column: Delaware\\nType: string\\nSummary: State or other jurisdiction of incorporation or organization\\n\\nColumn: 45-2647441\\nType: string\\nSummary: I.R.S. Employer Identification No.'}, excluded_embed_metadata_keys=['col_schema'], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='33b7b29c-8eba-458b-a25f-bb8f88951e92', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='3d4ec5b02a042598b0ea47cdac56453869c17b531a10f60343e9598e05a9390e'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='c6ffea61-1221-40e3-b0e0-5b24cfbd02d5', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='0cafbb2bbffe3085738e748c9ed19c5b88f6b300d876820fc3caa7afa8f0627f'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='c57f8dab-7b69-4850-8885-6a9cf0f531f9', node_type=<ObjectType.TEXT: '1'>, metadata={'table_df': \"{'Delaware': {0: '(State or other jurisdiction of incorporation or organization)', 1: '1515 3rd Street', 2: '(Address of principal executive offices, including zip code)', 3: '(Registrant’s telephone number, including area code)'}, '45-2647441': {0: '(I.R.S. Employer Identification No.)', 1: 'San Francisco, California 94158', 2: '(415) 612-8582', 3: ' '}}\", 'table_summary': \"Table providing information about a company's incorporation details, address of principal executive offices, and contact information.,\\nwith the following columns:\\n- Delaware: State or other jurisdiction of incorporation or organization\\n- 45-2647441: I.R.S. Employer Identification No.\\n\"}, hash='fadc844962620525c1d3c8d7ff1693a090642818928f9ce7600117258a39aa04')}, text=\"Table providing information about a company's incorporation details, address of principal executive offices, and contact information.,\\nwith the following columns:\\n- Delaware: State or other jurisdiction of incorporation or organization\\n- 45-2647441: I.R.S. Employer Identification No.\\n\", mimetype='text/plain', start_char_idx=601, end_char_idx=919, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n', index_id='c57f8dab-7b69-4850-8885-6a9cf0f531f9', obj=TextNode(id_='c57f8dab-7b69-4850-8885-6a9cf0f531f9', embedding=None, metadata={'table_df': \"{'Delaware': {0: '(State or other jurisdiction of incorporation or organization)', 1: '1515 3rd Street', 2: '(Address of principal executive offices, including zip code)', 3: '(Registrant’s telephone number, including area code)'}, '45-2647441': {0: '(I.R.S. Employer Identification No.)', 1: 'San Francisco, California 94158', 2: '(415) 612-8582', 3: ' '}}\", 'table_summary': \"Table providing information about a company's incorporation details, address of principal executive offices, and contact information.,\\nwith the following columns:\\n- Delaware: State or other jurisdiction of incorporation or organization\\n- 45-2647441: I.R.S. Employer Identification No.\\n\"}, excluded_embed_metadata_keys=['table_df', 'table_summary'], excluded_llm_metadata_keys=['table_df', 'table_summary'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='33b7b29c-8eba-458b-a25f-bb8f88951e92', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='3d4ec5b02a042598b0ea47cdac56453869c17b531a10f60343e9598e05a9390e'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='de618b65-c78a-4390-8536-4e9e295c0e49', node_type=<ObjectType.INDEX: '3'>, metadata={'col_schema': 'Column: Delaware\\nType: string\\nSummary: State or other jurisdiction of incorporation or organization\\n\\nColumn: 45-2647441\\nType: string\\nSummary: I.R.S. Employer Identification No.'}, hash='c008153189b8dd031a3e5e694239a50ebd21f42602676f072d9746241fcef858'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='c0fa90a9-fe14-46fa-8434-cd70e134d40e', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='cc6b1c09572e3a0bd06a93ccea32a5562b36a393a36ede3852f4a5fc946c51fd')}, text=\"Table providing information about a company's incorporation details, address of principal executive offices, and contact information.,\\nwith the following columns:\\n- Delaware: State or other jurisdiction of incorporation or organization\\n- 45-2647441: I.R.S. Employer Identification No.\\n\\n|Delaware|45-2647441|\\n|---|---|\\n|(State or other jurisdiction of incorporation or organization)|(I.R.S. Employer Identification No.)|\\n|1515 3rd Street|San Francisco, California 94158|\\n|(Address of principal executive offices, including zip code)|(415) 612-8582|\\n|(Registrant’s telephone number, including area code)| |\\n\", mimetype='text/plain', start_char_idx=601, end_char_idx=919, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'))"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index_nodes[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Build Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "recursive_index = VectorStoreIndex(nodes=text_nodes + index_nodes)\n",
    "raw_index = VectorStoreIndex.from_documents(documents)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create Query Engines"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d082846d028644899427f729f182ec05",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "aee5e54b678844a49250315c157e8750",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b4cb508e9392472ca2b1a1113a3f3585",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e29351d2f7944b7fab381bcd98b31462",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1a155d5cdd6849458690bd2a36ba45cc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "config.json:   0%|          | 0.00/801 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4e2abbef9639464e935d0252196e83c9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from llama_index.postprocessor.flag_embedding_reranker import (\n",
    "    FlagEmbeddingReranker,\n",
    ")\n",
    "\n",
    "reranker = FlagEmbeddingReranker(\n",
    "    top_n=5,\n",
    "    model=\"BAAI/bge-reranker-large\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "recursive_query_engine = recursive_index.as_query_engine(\n",
    "    similarity_top_k=15, node_postprocessors=[reranker], verbose=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_query_engine = raw_index.as_query_engine(\n",
    "    similarity_top_k=15, node_postprocessors=[reranker]\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Querying with two different query engines\n",
    "\n",
    "we compare base query engine vs recursive query engine with tables"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Table Query Task: Queries for Table Question Answering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "************New LlamaParse+ Basic Query Engine************\n",
      "The change in free cash flow from the financial and operational highlights is an increase of $826 million, from a net cash used in operating activities of $611 million in 2021 to net cash provided by operating activities of $215 million in 2022. The rate of this change is a positive improvement.\n",
      "\u001b[1;3;38;2;11;159;203mRetrieval entering 015f9778-1f7c-44cd-9e26-90f2c9e21550: TextNode\n",
      "\u001b[0m\u001b[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?\n",
      "\u001b[0m\u001b[1;3;38;2;11;159;203mRetrieval entering 5e8febd0-0c43-4552-9499-9465674b8877: TextNode\n",
      "\u001b[0m\u001b[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?\n",
      "\u001b[0m\u001b[1;3;38;2;11;159;203mRetrieval entering d3c8d59b-9d7e-4088-94e9-3e58aba09f10: TextNode\n",
      "\u001b[0m\u001b[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?\n",
      "\u001b[0m\u001b[1;3;38;2;11;159;203mRetrieval entering 25385e8f-24df-4660-959b-c499cc220246: TextNode\n",
      "\u001b[0m\u001b[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?\n",
      "\u001b[0m\u001b[1;3;38;2;11;159;203mRetrieval entering 6b48fe6f-a60e-425a-aba5-22b62d1b4512: TextNode\n",
      "\u001b[0m\u001b[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?\n",
      "\u001b[0m\u001b[1;3;38;2;11;159;203mRetrieval entering aa60761d-1d8a-4896-aef9-e41553f17558: TextNode\n",
      "\u001b[0m\u001b[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?\n",
      "\u001b[0m\u001b[1;3;38;2;11;159;203mRetrieval entering d0634d58-0589-47cb-9921-d2d57d88240f: TextNode\n",
      "\u001b[0m\u001b[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?\n",
      "\u001b[0m\u001b[1;3;38;2;11;159;203mRetrieval entering 492b36c1-8b39-4db9-8dca-dd9f6c488a9d: TextNode\n",
      "\u001b[0m\u001b[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?\n",
      "\u001b[0m\n",
      "************New LlamaParse+ Recursive Retriever Query Engine************\n",
      "The change in free cash flow from 2021 to 2022 is an increase of $635 million. This change represents a significant improvement in free cash flow performance over the period.\n"
     ]
    }
   ],
   "source": [
    "query = \"What is the change of free cash flow and what is the rate from the financial and operational highlights?\"\n",
    "\n",
    "response_1 = raw_query_engine.query(query)\n",
    "print(\"\\n************New LlamaParse+ Basic Query Engine************\")\n",
    "print(response_1)\n",
    "\n",
    "response_2 = recursive_query_engine.query(query)\n",
    "print(\n",
    "    \"\\n************New LlamaParse+ Recursive Retriever Query Engine************\"\n",
    ")\n",
    "print(response_2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
